Skip to content

Conversation

@clementval
Copy link
Contributor

Convert the op to a new entry point in the runtime CUFSyncGlobalDescriptor

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir labels Jan 2, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 2, 2025

@llvm/pr-subscribers-flang-fir-hlfir

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Convert the op to a new entry point in the runtime CUFSyncGlobalDescriptor


Full diff: https://github.com/llvm/llvm-project/pull/121524.diff

4 Files Affected:

  • (modified) flang/include/flang/Runtime/CUDA/descriptor.h (+4)
  • (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+41-1)
  • (modified) flang/runtime/CUDA/descriptor.cpp (+7)
  • (added) flang/test/Fir/CUDA/cuda-sync-desc.mlir (+20)
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 55878aaac57fb3..0ee7feca10e44c 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -33,6 +33,10 @@ void *RTDECL(CUFGetDeviceAddress)(
 void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
+/// Get the device address of registered with the \p hostPtr and sync them.
+void RTDECL(CUFSyncGlobalDescriptor)(
+    void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index fb0ef246546444..f08f9e412b8857 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -788,6 +788,45 @@ struct CUFLaunchOpConversion
   const mlir::SymbolTable &symTab;
 };
 
+struct CUFSyncDescriptorOpConversion
+    : public mlir::OpRewritePattern<cuf::SyncDescriptorOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  CUFSyncDescriptorOpConversion(mlir::MLIRContext *context,
+                                const mlir::SymbolTable &symTab)
+      : OpRewritePattern(context), symTab{symTab} {}
+
+  mlir::LogicalResult
+  matchAndRewrite(cuf::SyncDescriptorOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto mod = op->getParentOfType<mlir::ModuleOp>();
+    fir::FirOpBuilder builder(rewriter, mod);
+    mlir::Location loc = op.getLoc();
+
+    auto globalOp = mod.lookupSymbol<fir::GlobalOp>(op.getGlobalName());
+    if (!globalOp)
+      return mlir::failure();
+
+    auto hostAddr = builder.create<fir::AddrOfOp>(
+        loc, fir::ReferenceType::get(globalOp.getType()), op.getGlobalName());
+    mlir::func::FuncOp callee =
+        fir::runtime::getRuntimeFunc<mkRTKey(CUFSyncGlobalDescriptor)>(loc,
+                                                                       builder);
+    auto fTy = callee.getFunctionType();
+    mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+    mlir::Value sourceLine =
+        fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+    llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+        builder, loc, fTy, hostAddr, sourceFile, sourceLine)};
+    builder.create<fir::CallOp>(loc, callee, args);
+    op.erase();
+    return mlir::success();
+  }
+
+private:
+  const mlir::SymbolTable &symTab;
+};
+
 class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
 public:
   void runOnOperation() override {
@@ -851,7 +890,8 @@ void cuf::populateCUFToFIRConversionPatterns(
                   CUFFreeOpConversion>(patterns.getContext());
   patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
                                                &dl, &converter);
-  patterns.insert<CUFLaunchOpConversion>(patterns.getContext(), symtab);
+  patterns.insert<CUFLaunchOpConversion, CUFSyncDescriptorOpConversion>(
+      patterns.getContext(), symtab);
 }
 
 void cuf::populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab,
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 391c47e84241d4..947eeb66aa3d6c 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -46,6 +46,13 @@ void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
       (void *)dst, (const void *)src, count, cudaMemcpyHostToDevice));
 }
 
+void RTDEF(CUFSyncGlobalDescriptor)(
+    void *hostPtr, const char *sourceFile, int sourceLine) {
+  void *devAddr{RTNAME(CUFGetDeviceAddress)(hostPtr, sourceFile, sourceLine)};
+  RTNAME(CUFDescriptorSync)
+  ((Descriptor *)devAddr, (Descriptor *)hostPtr, sourceFile, sourceLine);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-sync-desc.mlir b/flang/test/Fir/CUDA/cuda-sync-desc.mlir
new file mode 100644
index 00000000000000..20b317f34a7f26
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-sync-desc.mlir
@@ -0,0 +1,20 @@
+// RUN: fir-opt --cuf-convert %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 ([email protected]:clementval/llvm-project.git f37e52237791f58438790c77edeb8de08f692987)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  fir.global @_QMdevptrEdev_ptr {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<!fir.array<?xf32>>> {
+    %0 = fir.zero_bits !fir.ptr<!fir.array<?xf32>>
+    %c0 = arith.constant 0 : index
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) {allocator_idx = 2 : i32} : (!fir.ptr<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.ptr<!fir.array<?xf32>>>
+  }
+  func.func @_QQmain() {
+    cuf.sync_descriptor @_QMdevptrEdev_ptr
+    return
+  }
+}
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: %[[HOST_ADDR:.*]] = fir.address_of(@_QMdevptrEdev_ptr) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+// CHECK: %[[HOST_ADDR_PTR:.*]] = fir.convert %[[HOST_ADDR]] : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.llvm_ptr<i8>
+// CHECK: fir.call @_FortranACUFSyncGlobalDescriptor(%[[HOST_ADDR_PTR]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32)

@clementval clementval changed the base branch from users/clementval/cuf_global_pointer to main January 3, 2025 01:02
@clementval clementval merged commit 6dcd2b0 into llvm:main Jan 3, 2025
12 checks passed
@clementval clementval deleted the cuf_global_pointer2 branch January 3, 2025 01:08
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 3, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-flang-rhel-clang running on ppc64le-flang-rhel-test while building flang at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/16247

Here is the relevant piece of the build log for the reference
Step 5 (build-unified-tree) failure: build (failure)
...
64.956 [128/11/6546] Linking CXX shared module lib/BugpointPasses.so
65.218 [128/10/6547] Linking CXX executable bin/llvm-reduce
65.325 [128/9/6548] Linking CXX executable bin/llvm-opt-fuzzer
65.820 [128/8/6549] Linking CXX executable bin/clang-extdef-mapping
67.055 [128/7/6550] Linking CXX executable bin/clang-scan-deps
67.412 [128/6/6551] Linking CXX shared library lib/libclang.so.20.0.0git
67.474 [127/6/6552] Creating library symlink lib/libclang.so.20.0git lib/libclang.so
68.114 [125/7/6553] Linking CXX executable bin/c-arcmt-test
68.244 [125/6/6554] Linking CXX executable bin/clang-check
68.879 [125/5/6555] Building CXX object tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o
FAILED: tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o 
ccache /home/buildbots/llvm-external-buildbots/clang.16.0.1/bin/clang++ -DFLANG_INCLUDE_TESTS=1 -DFLANG_LITTLE_ENDIAN=1 -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/flang/lib/Optimizer/Transforms -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/flang/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/../mlir/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/mlir/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/clang/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/../clang/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Werror -Wno-deprecated-copy -Wno-string-conversion -Wno-ctad-maybe-unsupported -Wno-unused-command-line-argument -Wstring-conversion           -Wcovered-switch-default -Wno-nested-anon-types -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -MF tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o.d -o tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -c /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp:827:28: error: private field 'symTab' is not used [-Werror,-Wunused-private-field]
  const mlir::SymbolTable &symTab;
                           ^
1 error generated.
69.024 [125/4/6556] Linking CXX executable bin/mlir-cpu-runner
70.822 [125/3/6557] Linking CXX executable bin/c-index-test
70.825 [125/2/6558] Building CXX object lib/LTO/CMakeFiles/LLVMLTO.dir/LTO.cpp.o
177.679 [125/1/6559] Building CXX object tools/flang/lib/Optimizer/CodeGen/CMakeFiles/FIRCodeGen.dir/CodeGen.cpp.o
ninja: build stopped: subcommand failed.

clementval added a commit that referenced this pull request Jan 3, 2025
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 3, 2025

LLVM Buildbot has detected a new failure on builder ppc64-flang-aix running on ppc64-flang-aix-test while building flang at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/201/builds/502

Here is the relevant piece of the build log for the reference
Step 5 (build-unified-tree) failure: build (failure)
...
-- Compiler-RT supported architectures: powerpc;powerpc64
-- check-shadowcallstack does nothing.
-- Configuring done (2.5s)
-- Generating done (0.2s)
-- Build files have been written to: /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/runtimes/runtimes-bins
12.927 [28/3/9] Performing build step for 'runtimes'
0.754 [0/1/1] Generating /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/compile_commands.json
13.829 [27/3/10] No install step for 'runtimes'
14.100 [26/3/12] Completed 'runtimes'
27.512 [26/2/13] Building CXX object tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o
FAILED: tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o 
/home/llvm/llvm-external-buildbots/clang.17.0.2/bin/clang++ -DFLANG_BIG_ENDIAN=1 -DFLANG_INCLUDE_TESTS=1 -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_LARGE_FILE_API -D_XOPEN_SOURCE=700 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/flang/lib/Optimizer/Transforms -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/flang/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/../mlir/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/mlir/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/clang/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/../clang/include -mcmodel=large -fPIC -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Werror -Wno-deprecated-copy -Wno-string-conversion -Wno-ctad-maybe-unsupported -Wno-unused-command-line-argument -Wstring-conversion           -Wcovered-switch-default -Wno-nested-anon-types -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -MF tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o.d -o tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -c /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp:827:28: error: private field 'symTab' is not used [-Werror,-Wunused-private-field]
  827 |   const mlir::SymbolTable &symTab;
      |                            ^
1 error generated.
94.484 [26/1/14] Building CXX object tools/flang/lib/Optimizer/CodeGen/CMakeFiles/FIRCodeGen.dir/CodeGen.cpp.o
ninja: build stopped: subcommand failed.

github-actions bot pushed a commit to arm/arm-toolchain that referenced this pull request Jan 10, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang:runtime flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants